import plotly.express as px
import pandas as pd
# df = pd.read_parquet("era5-pds/measurements-m1.parquet")
# df = pd.read_parquet("era5-pds/measurements-i10k.parquet")
# df = pd.read_parquet("era5-pds/measurements-ryzen3.parquet")
# df = pd.read_parquet("era5-pds/measurements-i13k.parquet")
df = pd.read_parquet("era5-pds/measurements-i13k-always-split.parquet")
df = df.query("clevel > 0") # get rid of no compression results
category_orders = {"dset": ["flux", "wind", "pressure", "precip", "snow"],
"filter": ["nofilter", "shuffle", "bitshuffle", "bytedelta"]}
labels = {
"cratio": "Compression ratio (x times)",
"cspeed": "Compression speed (GB/s)",
"dspeed": "Decompression speed (GB/s)",
"codec": "Codec",
"dset": "Dataset",
"filter": "Filter",
"cratio * cspeed": "Compression ratio x Compression speed",
"cratio * dspeed": "Compression ratio x Decompression speed",
"cratio * cspeed * dspeed": "Compression ratio x Compression x Decompression speeds",
}
hover_data = {"filter": False, "codec": True, "cratio": ':.1f', "cspeed": ':.2f',
"dspeed": ':.2f', "dset": True, "clevel": True}
fig = px.box(df, x="cratio", color="filter", points="all", hover_data=hover_data,
labels=labels, range_x=(0, 60), range_y=(-.4, .35),)
fig.update_layout(
title={
'text': "Compression ratio vs filter (larger is better)",
#'y':0.9,
'x':0.25,
'xanchor': 'left',
#'yanchor': 'top'
},
#xaxis_title="Filter",
)
fig.show()
hover_data = {"filter": False, "codec": True, "cratio": ':.1f', "cspeed": ':.2f', "dspeed": ':.2f',
"dset": False, "clevel": True}
fig = px.strip(df, y="cratio", x="dset", color="filter", hover_data=hover_data, labels=labels,
category_orders=category_orders)
fig.show()
hover_data = {"filter": False, "codec": False, "cratio": ':.1f', "cspeed": ':.2f', "dspeed": ':.2f',
"dset": True, "clevel": True}
fig = px.strip(df, y="cratio", x="codec", color="filter", labels=labels, hover_data=hover_data)
fig.show()
df["cratio * cspeed"] = df["cratio"] * df["cspeed"]
df["cratio * dspeed"] = df["cratio"] * df["dspeed"]
df["cratio * cspeed * dspeed"] = df["cratio"] * df["cspeed"] * df["dspeed"]
df_mean = df.groupby(['filter', 'clevel', 'codec']).mean(numeric_only=True).reset_index(level=[0,1,2])
df_mean2 = df.groupby(['filter', 'dset']).mean(numeric_only=True).reset_index(level=[0,1])
df_mean
| filter | clevel | codec | cspeed | dspeed | cratio | cratio * cspeed | cratio * dspeed | cratio * cspeed * dspeed | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | bitshuffle | 1 | BLOSCLZ | 7.812132 | 59.637610 | 8.818274 | 84.678041 | 582.691180 | 5698.753431 |
| 1 | bitshuffle | 1 | LZ4 | 8.963989 | 67.832034 | 11.901284 | 120.736520 | 869.704803 | 8906.433227 |
| 2 | bitshuffle | 1 | LZ4HC | 6.225950 | 63.294638 | 13.051075 | 112.823004 | 932.375102 | 8331.310817 |
| 3 | bitshuffle | 1 | ZLIB | 7.260142 | 26.454652 | 11.757224 | 115.651208 | 384.140367 | 4084.571362 |
| 4 | bitshuffle | 1 | ZSTD | 9.928984 | 46.097012 | 16.468393 | 222.639905 | 881.153048 | 12308.373623 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75 | shuffle | 9 | BLOSCLZ | 10.061537 | 64.679785 | 11.330588 | 178.281972 | 962.654177 | 16860.197460 |
| 76 | shuffle | 9 | LZ4 | 10.796296 | 83.420264 | 11.119430 | 171.031707 | 1151.295967 | 18953.235240 |
| 77 | shuffle | 9 | LZ4HC | 1.988419 | 91.030634 | 14.480430 | 59.076848 | 1617.284874 | 7354.570322 |
| 78 | shuffle | 9 | ZLIB | 0.899892 | 20.038016 | 17.874545 | 28.275532 | 463.741267 | 789.535034 |
| 79 | shuffle | 9 | ZSTD | 0.124527 | 49.667825 | 19.741196 | 3.115200 | 1419.451927 | 244.997181 |
80 rows × 9 columns
fig = px.bar(df_mean, y="cratio", x="codec", color="filter", category_orders=category_orders,
barmode="group", facet_col="clevel", labels=labels, title="Compression ratio (mean)")
fig.show()
fig = px.bar(df_mean, y="cspeed", x="codec", color="filter", category_orders=category_orders,
barmode="group", facet_col="clevel", labels=labels, title="Compression speed (mean)")
fig.show()
fig = px.bar(df_mean2, y="cspeed", x="filter", facet_col="dset", color="filter", log_y=True,
labels=labels, category_orders=category_orders)
fig.show()
fig = px.strip(df, y="cspeed", x="codec", color="filter", hover_data=hover_data, labels=labels)
fig.show()
fig = px.bar(df_mean, y="dspeed", x="codec", color="filter",
category_orders=category_orders, barmode="group",
facet_col="clevel", labels=labels, title="Decompression speed (mean)")
fig.show()
fig = px.bar(df_mean2, y="dspeed", x="filter", facet_col="dset", color="filter", log_y=True,
labels=labels, category_orders=category_orders)
fig.show()
fig = px.strip(df, y="dspeed", x="codec", color="filter", hover_data=hover_data, labels=labels)
fig.show()
hover_data = {"filter": True, "codec": True, "cratio": ':.1f', "cspeed": ':.2f',
"dspeed": ':.2f', "dset": True, "clevel": True}
fig = px.scatter(df, y="cratio", x="cspeed", color="filter", log_y=True,
hover_data=hover_data, labels=labels)
fig.show()
fig = px.box(df, y="cratio * cspeed", x="codec", color="filter", log_y=True,
hover_data=hover_data, labels=labels)
fig.show()
fig = px.bar(df_mean, y="cratio * cspeed", x="codec", color="filter", log_y=True,
labels=labels, facet_col="clevel", barmode="group", category_orders=category_orders)
fig.show()
fig = px.bar(df_mean2, y="cratio * cspeed", x="filter", facet_col="dset", color="filter", log_y=True,
labels=labels, category_orders=category_orders)
fig.show()
hover_data = {"filter": True, "codec": True, "cratio": ':.1f', "cspeed": ':.2f',
"dspeed": ':.2f', "dset": True, "clevel": True}
fig = px.scatter(df, y="cratio", x="dspeed", color="filter", log_y=True,
hover_data=hover_data, labels=labels)
fig.show()
fig = px.box(df, y="cratio * dspeed", x="codec", color="filter", log_y=True,
hover_data=hover_data, labels=labels, category_orders=category_orders)
fig.show()
fig = px.bar(df_mean, y="cratio * dspeed", x="codec", color="filter", log_y=True,
labels=labels, facet_col="clevel", barmode="group", category_orders=category_orders)
fig.show()
fig = px.bar(df_mean2, y="cratio * dspeed", x="filter", facet_col="dset", color="filter", log_y=True,
labels=labels, category_orders=category_orders)
fig.show()
fig = px.box(df, y="cratio * cspeed * dspeed", x="codec", color="filter",
log_y=True, hover_data=hover_data, labels=labels, category_orders=category_orders)
fig.show()
fig = px.bar(df_mean, y="cratio * cspeed * dspeed", x="codec", color="filter", log_y=True,
labels=labels, facet_col="clevel", barmode="group", category_orders=category_orders)
fig.show()
fig = px.bar(df_mean2, y="cratio * cspeed * dspeed", x="filter", facet_col="dset", color="filter", log_y=True,
labels=labels, category_orders=category_orders)
fig.show()